//version 16.0
clear all
set more off
capture log close

/*
Get mean/sd, Nobs, Nppl by Early/Late sample period, separately for 
daughters, sons, mothers, fathers in regression samples.
*/

log using ${us_results}/descr_byperiod_us.log, replace

local yrmin1=1985 
local yrmax1=1995

local yrmin2=1996
local yrmax2=2007

local yrmin3=2008
local yrmax3=2019



*USE the ranked MAIN SAMPLE 
use *newid *LAB* AGE *cohort *LABYR* *MF* *empl* *schmax year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

gen AGEC1=year-cohort-40 
gen f_LABAGE_1=f_LABYR-f_cohort 
gen m_LABAGE_1=m_LABYR-m_cohort 

forval i=2/4{
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}



*Time periods
gen Period=.
replace Period=1 if year>=`yrmin1' & year<=`yrmax1'
replace Period=2 if year>=`yrmin2' & year<=`yrmax2'
replace Period=3 if year>=`yrmin3' & year<=`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 2 "`yrmin2'-`yrmax2'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 
// only going to use periods 1, 3 for table, so could drop 2 here


*** Need to replace some values with missing so correct subsamples used for computing descriptives 

// NOTE: Starting point is IRP sample.

// IGE sample 
replace LOGLABc=. if m_LOGLABc==. & f_LOGLABc==.
replace m_LOGLABc=. if LOGLABc==.
replace f_LOGLABc=. if LOGLABc==.
su LOGLABc m_LOGLABc f_LOGLABc

// Decomp samples , using children in mother OR father sample
	gen m_decomp=1 if (pm_LAB!=. & m_LABAGE_1!=. & m_emplavg!=. & m_schmax!=.) 
	gen f_decomp=1 if (pf_LAB!=. & f_LABAGE_1!=. & f_emplavg!=. & f_schmax!=.)
	 replace schmax=. 	if m_decomp!=1 & f_decomp!=1
	 replace employ=. 	if m_decomp!=1 & f_decomp!=1
	 replace m_schmax=. if m_decomp!=1
	 replace m_emplavg=. if m_decomp!=1
	 replace f_schmax=. if f_decomp!=1
	 replace f_emplavg=. if f_decomp!=1
	 su schmax employ m_schmax m_emplavg f_schmax f_emplavg
	 


*Children
preserve 
	rename LAB Income
	rename LOGLABc LogIncome
	gen AgeAtIncome=year-cohort 
	rename schmax Education
	rename employ Employment 
	rename cohort BirthYear	
	egen flag1=tag(newid Period) // flag one unique obs per child
collapse (mean)  MeanIncome=Income MeanLogIncome=LogIncome MeanAgeIncome=AgeAtIncome MeanEducation=Education  MeanEmployment=Employment MeanBirthYear=BirthYear (sd) SDIncome=Income SDLogIncome=LogIncome SDAgeIncome=AgeAtIncome SDEducation=Education SDEmployment=Employment SDBirthYear=BirthYear (count) Nobs=Income (sum) Nindiv=flag1, by(Period female)
 gen person=.
	replace person=1 if female==1
	replace person=2 if female==0
tempfile child_descr
save `child_descr'
restore 


*Parents - Mothers, then fathers
local x=2
foreach p in m f {
local x=`x'+1
preserve 
	keep if pLAB!=. & `p'_LAB!=. // do not want to average over parent characteristics for parents not included in regression sample
	rename `p'_LAB Income
	rename `p'_LOGLABc LogIncome
	rename `p'_LABAGE_1 AgeAtIncome
	rename `p'_schmax Education
	rename `p'_emplavg Employment 
	rename `p'_cohort BirthYear
	egen flag1`p'=tag(`p'_newid Period) // flag one unique obs per parent 
collapse (mean)  MeanIncome=Income MeanLogIncome=LogIncome MeanAgeIncome=AgeAtIncome MeanEducation=Education  MeanEmployment=Employment MeanBirthYear=BirthYear (sd) SDIncome=Income SDLogIncome=LogIncome SDAgeIncome=AgeAtIncome SDEducation=Education SDEmployment=Employment SDBirthYear=BirthYear (count) Nobs=Income (sum) Nindiv=flag1, by(Period)
 gen person=`x'
tempfile `p'_descr
save ``p'_descr'
restore 
}


* FULL sample period statistics 

replace Period=9 if year>=`yrmin1' & year<=`yrmax3'

*Children
preserve 
	rename LAB Income
	rename LOGLABc LogIncome
	gen AgeAtIncome=year-cohort 
	rename schmax Education
	rename employ Employment 
	rename cohort BirthYear	
	egen flag1=tag(newid) // flag one unique obs per child
collapse (mean)  MeanIncome=Income MeanLogIncome=LogIncome MeanAgeIncome=AgeAtIncome MeanEducation=Education  MeanEmployment=Employment MeanBirthYear=BirthYear (sd) SDIncome=Income SDLogIncome=LogIncome SDAgeIncome=AgeAtIncome SDEducation=Education SDEmployment=Employment SDBirthYear=BirthYear (count) Nobs=Income (sum) Nindiv=flag1, by(Period female)
 gen person=.
	replace person=1 if female==1
	replace person=2 if female==0
tempfile child_descr9
save `child_descr9'
restore 


*Parents - Mothers, then fathers
local x=2
foreach p in m f {
local x=`x'+1
preserve 
	keep if pLAB!=. & `p'_LAB!=. // do not want to average over parent characteristics for parents not included in regression sample
	rename `p'_LAB Income
	rename `p'_LOGLABc LogIncome
	rename `p'_LABAGE_1 AgeAtIncome
	rename `p'_schmax Education
	rename `p'_emplavg Employment 
	rename `p'_cohort BirthYear
	egen flag1`p'=tag(`p'_newid) // flag one unique obs per parent 
collapse (mean)  MeanIncome=Income MeanLogIncome=LogIncome MeanAgeIncome=AgeAtIncome MeanEducation=Education  MeanEmployment=Employment MeanBirthYear=BirthYear (sd) SDIncome=Income SDLogIncome=LogIncome SDAgeIncome=AgeAtIncome SDEducation=Education SDEmployment=Employment SDBirthYear=BirthYear (count) Nobs=Income (sum) Nindiv=flag1, by(Period)
gen person=`x'
tempfile `p'_descr9
save ``p'_descr9'
restore 
}




*Combine
use `child_descr', clear
append using `m_descr'				
append using `f_descr'			
append using `child_descr9'		
append using `m_descr9'			
append using `f_descr9'			



*Reshape long to get list of MEANs SDs
rename (Nobs Nindiv) (SDNobs SDNindiv) // rename Ns to get them below SD of char in table 
drop female
reshape long Mean SD, i(person Period) j(Variable) string
order Mean SD, last

*Reshape wide to get periods wide
reshape wide Mean SD , i(person Variable) j(Period)

*"People" 
label define personvals 1 "Daughters" 2 "Sons" 3 "Mothers" 4 "Fathers"
label values person personvals

*Label variables 
label var Mean1 "Mean for `yrmin1'-`yrmax1'"
label var SD1 "SD for `yrmin1'-`yrmax1'"
label var Mean2 "Mean for `yrmin2'-`yrmax2'"
label var SD2 "SD for `yrmin2'-`yrmax2'"
label var Mean3 "Mean for `yrmin3'-`yrmax3'"
label var SD3 "SD for `yrmin3'-`yrmax3'"
label var Mean9 "Mean for `yrmin1'-`yrmax3'"
label var SD9 "SD for `yrmin1'-`yrmax3'"

*SAVE
save ${us_results}/descr_byperiod_us.dta, replace 




//clear
